from IPython.display import HTML
HTML('''<script>
code_show=true;
function code_toggle() {
if (code_show){
$('div.input').hide();
} else {
$('div.input').show();
}
code_show = !code_show
}
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')
%matplotlib inline
import numpy as np
import pandas as pd
import seaborn as sns
import datetime
import matplotlib.pyplot as plt
from arch.unitroot import VarianceRatio
from itertools import groupby
from scipy.stats import norm
from sklearn.metrics import r2_score
from statsmodels.robust.scale import mad
from sgmbasketball.models.factor_model.play_by_play_data import PlayByPlayCleanData
from stratagemdataprocessing.data_api import find_basketball_events
from sgmresearchbase.coint.space import normalise
from sgmresearchbase.coint.common import hurst_naive
from sgmresearchbase.coint.services import CointegrationService, _zs
from sgmresearchbase.coint.projection import interpolate
from IPython.display import clear_output
plt.style.use('ggplot')
plt.rcParams['figure.figsize'] = (16, 6)
START_DT = datetime.datetime(2016, 10, 1)
END_DT = datetime.datetime(2017, 6, 1)
ALL_EVENTS = find_basketball_events(START_DT, END_DT, True)
NBA_EVENTS = filter(lambda e: e['stage_name'] == 'NBA', ALL_EVENTS)
PBP = PlayByPlayCleanData(str(START_DT)[0:10], str(END_DT)[0:10], 'NBA', 'pbp', fixture_filter=None).get_data_ready()
clear_output()
def get_basis(vals, model=1):
if vals.shape[0] < 1:
return np.zeros((2,))
both_bases = CointegrationService.get_cointegrating_bases(vals, model=model)
if both_bases is None:
return np.zeros((2,))
if both_bases[0, 0]*both_bases[1, 0] < 0:
basis = both_bases[:, 0]
elif both_bases[0, 1]*both_bases[1, 1] < 0:
basis = both_bases[:, 1]
else:
basis = both_bases[:, 0]*0.0
if basis[0] < 0:
basis = -basis
return basis
def compare_betas(b1, b2):
signs = np.sign(b1)
if (signs == -np.sign(b2)).all():
diff = b1 - np.abs(b2)*signs
else:
diff = b1 - b2
return np.linalg.norm(diff)
def plot_scores(scores, ax):
ax.plot(scores)
ax.legend(['Team 1', 'Team 2'], loc='best')
ax.set_ylabel('Score [-]')
ax.set_xlabel('Event [-]')
def plot_bases(bases, ax):
ax.plot(bases, drawstyle='steps-post')
ax.axhline(actual_basis[0], color='k')
ax.axhline(actual_basis[1], color='k')
ax.set_xlabel('Event [-]')
ax.set_ylabel('Basis value [-]')
def plot_residuals(bases, actual_basis, ax):
ax.scatter(np.arange(bases.shape[0]), (bases - actual_basis)[:, 0])
ax.set_xlabel('Event [-]')
ax.set_ylabel('Residual $\\beta_0 - \\tilde{\\beta_0}$ [-]')
def plot_errors(bases, actual_basis, ax, n_phases=4):
for g in np.array_split([b for b in bases[:] if np.sum(np.abs(b)) > 1e-7], n_phases):
sns.distplot([compare_betas(actual_basis, b) for b in g], ax=ax)
ax.legend(['Phase %d' % i for i in range(n_phases)], loc='best')
ax.set_ylabel('Frequency [-]')
ax.set_xlabel('Estimate error norm [-]')
def plot_dists(bases, ax):
sns.distplot([b for b in bases[:, 0] if abs(b) > 0.0], ax=ax)
sns.distplot([b for b in bases[:, 1] if abs(b) > 0.0], ax=ax)
ax.set_ylabel('Frequency [-]')
ax.set_xlabel('basis value [-]')
def do_basis_plots(bases, actual_basis, axes):
plot_bases(bases, axes[0])
plot_residuals(bases, actual_basis, axes[1])
plot_errors(bases, actual_basis, axes[2])
plot_dists(bases, axes[3])
def do_analysis(eid, hl=0, lb=100):
data = PBP.loc[eid]
X = data[['points_h', 'points_a']].values
actual_basis = get_basis(X)
bases_windowed = np.array([np.zeros(2)]*lb + [get_basis(X[(i-lb):i, :]) for i in range(lb, X.shape[0])])
bases_agg = np.array([get_basis(X[:i, :]) for i in range(0, X.shape[0])])
if hl > 0:
bases_agg = pd.ewma(bases, halflife=hl)
print 'First half: ', get_basis(data.loc[['Q1', 'Q2']][['points_h', 'points_a']].values)
print 'Second half:', get_basis(data.loc[['Q3', 'Q4']][['points_h', 'points_a']].values)
print 'Full match: ', actual_basis
f = plt.figure(figsize=(16, 18))
ax = plt.subplot2grid((5, 2), (0, 0), colspan=2)
ax.set_title('Event ID: %d' % eid)
plot_scores(X, ax)
plt.subplot2grid((5, 2), (1, 0))
axes = [plt.subplot2grid((5, 2), (i, 0)) for i in range(1, 5)]
axes[0].set_title('Aggregate')
do_basis_plots(bases_agg, actual_basis, axes)
axes = [plt.subplot2grid((5, 2), (i, 1)) for i in range(1, 5)]
axes[0].set_title('Windowed (100)')
do_basis_plots(bases_windowed, actual_basis, axes)
plt.tight_layout()
do_analysis(2352891)
do_analysis(2352895)
do_analysis(2352899)
do_analysis(2352915)
do_analysis(2352926)
do_analysis(2352945)
do_analysis(2352956)
do_analysis(2352958)
do_analysis(2352959)
do_analysis(2352963)
do_analysis(2352968)
do_analysis(2352969)
do_analysis(2352977)
do_analysis(2352978)